{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "23b9f1de-baae-40b2-ae01-b5cbbf5639ea",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import glob, os\n",
    "import socket \n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import pytz\n",
    "import datetime\n",
    "from linearmodels import PanelOLS\n",
    "\n",
    "pd.set_option('display.max_rows', 500)\n",
    "# \n",
    "mac_dir_compustat = '/Users/robertogomezcram/Dropbox/Cram_Olbert/Data/Compustat_financials/final/'\n",
    "mac_dir_TAQ = '/Users/robertogomezcram/Dropbox/Cram_Olbert/Data/TAQ_data/TaxDates_clean/' \n",
    "mac_dir_taxevents = '/Users/robertogomezcram/Dropbox/Cram_Olbert/Data/GlobalTaxReformEvents/'\n",
    "mac_dir_cds = '/Users/robertogomezcram/Dropbox/Cram_Olbert/Data/CDS/Daily/'\n",
    "mac_dir_countries = '/Users/robertogomezcram/Dropbox/Cram_Olbert/Data/Macro/'\n",
    "mac_dir_daily = '/Users/robertogomezcram/Dropbox/Cram_Olbert/Data/CRSP/' \n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "36ff95be-0e34-4f6a-abd4-84791568fe28",
   "metadata": {},
   "outputs": [],
   "source": [
    "# select the variables we use\n",
    "'Country_name','split_haven_list_all'\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "661912ce-20b5-4c31-a330-594adecd3a0d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "ef47e91f-5eb5-45d7-9d6d-6bf06d8d3c84",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>country_alpha2</th>\n",
       "      <th>country_alpha3</th>\n",
       "      <th>country_name</th>\n",
       "      <th>continent</th>\n",
       "      <th>population</th>\n",
       "      <th>population_fill</th>\n",
       "      <th>gdp_capita_ppp</th>\n",
       "      <th>gdp_ppp_rel</th>\n",
       "      <th>gdp_total_WB</th>\n",
       "      <th>gdp_ppp</th>\n",
       "      <th>...</th>\n",
       "      <th>tax_accrued</th>\n",
       "      <th>tax_cash</th>\n",
       "      <th>employees</th>\n",
       "      <th>assets_tangible</th>\n",
       "      <th>jurisdiction_name</th>\n",
       "      <th>profit_pretax_0</th>\n",
       "      <th>Country_name</th>\n",
       "      <th>sort_dist_mintax</th>\n",
       "      <th>sort_dist_mintax_rev</th>\n",
       "      <th>sort_dist_mintax_profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td></td>\n",
       "      <td>CUW</td>\n",
       "      <td>cuw</td>\n",
       "      <td>check</td>\n",
       "      <td>0.155014</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20300.318359</td>\n",
       "      <td>1.212269</td>\n",
       "      <td>2.595822e+09</td>\n",
       "      <td>3.146834e+09</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td></td>\n",
       "      <td>MNP</td>\n",
       "      <td>mnp</td>\n",
       "      <td>check</td>\n",
       "      <td>0.057557</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.182000e+09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td>XKX</td>\n",
       "      <td>xkx</td>\n",
       "      <td>check</td>\n",
       "      <td>1.775378</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10795.495117</td>\n",
       "      <td>2.483643</td>\n",
       "      <td>7.716925e+09</td>\n",
       "      <td>1.916608e+10</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td></td>\n",
       "      <td>MAF</td>\n",
       "      <td>maf</td>\n",
       "      <td>check</td>\n",
       "      <td>0.038659</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td></td>\n",
       "      <td>IMN</td>\n",
       "      <td>imn</td>\n",
       "      <td>check</td>\n",
       "      <td>0.085032</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.315388e+09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 60 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  country_alpha2 country_alpha3 country_name continent  population  \\\n",
       "0                           CUW          cuw     check    0.155014   \n",
       "1                           MNP          mnp     check    0.057557   \n",
       "2                           XKX          xkx     check    1.775378   \n",
       "3                           MAF          maf     check    0.038659   \n",
       "4                           IMN          imn     check    0.085032   \n",
       "\n",
       "   population_fill  gdp_capita_ppp  gdp_ppp_rel  gdp_total_WB       gdp_ppp  \\\n",
       "0              NaN    20300.318359     1.212269  2.595822e+09  3.146834e+09   \n",
       "1              NaN             NaN          NaN  1.182000e+09           NaN   \n",
       "2              NaN    10795.495117     2.483643  7.716925e+09  1.916608e+10   \n",
       "3              NaN             NaN          NaN           NaN           NaN   \n",
       "4              NaN             NaN          NaN  7.315388e+09           NaN   \n",
       "\n",
       "   ...  tax_accrued  tax_cash  employees  assets_tangible  jurisdiction_name  \\\n",
       "0  ...          NaN       NaN        NaN              NaN                      \n",
       "1  ...          NaN       NaN        NaN              NaN                      \n",
       "2  ...          NaN       NaN        NaN              NaN                      \n",
       "3  ...          NaN       NaN        NaN              NaN                      \n",
       "4  ...          NaN       NaN        NaN              NaN                      \n",
       "\n",
       "   profit_pretax_0  Country_name  sort_dist_mintax  sort_dist_mintax_rev  \\\n",
       "0              0.0           NaN               NaN                   NaN   \n",
       "1              0.0           NaN               NaN                   NaN   \n",
       "2              0.0           NaN               NaN                   NaN   \n",
       "3              0.0           NaN               NaN                   NaN   \n",
       "4              0.0           NaN               NaN                   NaN   \n",
       "\n",
       "   sort_dist_mintax_profit  \n",
       "0                      NaN  \n",
       "1                      NaN  \n",
       "2                      NaN  \n",
       "3                      NaN  \n",
       "4                      NaN  \n",
       "\n",
       "[5 rows x 60 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Don't add\n",
    "df_country = pd.read_stata(mac_dir_countries + 'country_sortingvariables_16Feb2022.dta')\n",
    "df_link = pd.read_excel(mac_dir_countries+ 'CBCR_countries.xls',)  \n",
    "df_country = pd.merge(df_country,df_link, how = 'left', on= 'country_alpha3' )\n",
    "df_country.head()\n",
    "\n",
    "df_country_rfs = pd.read_stata(mac_dir_countries + 'country_sortingvariables_RFS.dta') \n",
    "df_country_rfs = df_country_rfs[['country_alpha3','sort_dist_mintax', 'sort_dist_mintax_rev', 'sort_dist_mintax_profit']]\n",
    "#df_link = pd.read_excel(mac_dir_countries+ 'CBCR_countries.xls',)  \n",
    "\n",
    "#df_link.to_excel('/Volumes/XTRMQ/Data/MarketIt/'+'country_list.xlsx')\n",
    "df_country = pd.merge(df_country,df_country_rfs, how = 'left', on = 'country_alpha3' )\n",
    "df_country.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "0d8a2c4a-3fa9-4251-8459-d84a5db013f4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>dgap_bd</th>\n",
       "      <th>Date_bd</th>\n",
       "      <th>index_event</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2021-01-14</td>\n",
       "      <td>0 days</td>\n",
       "      <td>2021-01-14</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2021-01-15</td>\n",
       "      <td>0 days</td>\n",
       "      <td>2021-01-15</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2021-03-31</td>\n",
       "      <td>0 days</td>\n",
       "      <td>2021-03-31</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2021-06-05</td>\n",
       "      <td>2 days</td>\n",
       "      <td>2021-06-07</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2021-07-01</td>\n",
       "      <td>0 days</td>\n",
       "      <td>2021-07-01</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Date dgap_bd    Date_bd  index_event\n",
       "0 2021-01-14  0 days 2021-01-14            1\n",
       "1 2021-01-15  0 days 2021-01-15            1\n",
       "2 2021-03-31  0 days 2021-03-31            1\n",
       "3 2021-06-05  2 days 2021-06-07            1\n",
       "4 2021-07-01  0 days 2021-07-01            1"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# don't add\n",
    "\n",
    "df_events = pd.read_excel( mac_dir_taxevents+'OECD Global Tax Reforms_v3_list.xlsx')\n",
    "df_events['Date'] = pd.to_datetime(df_events['Date'])\n",
    "df_events = df_events.drop_duplicates(subset =['Date'])\n",
    "df_events = df_events[['Date','Event Name']].copy()\n",
    "\n",
    "df_events = df_events.drop_duplicates( )\n",
    "\n",
    "#  Shifting the announcement date to be the next trading day\n",
    "# CRSP dates\n",
    "crsp_dats = pd.read_stata(mac_dir_daily + 'crsp_dats.dta')\n",
    "crsp_dats = crsp_dats.drop(['index'], axis = 1)\n",
    "crsp_dats['date'] = pd.to_datetime(crsp_dats['date'])\n",
    "\n",
    "uniq_events = df_events[['Date']].drop_duplicates()\n",
    "\n",
    "# Create up to 5 days post rdq relative to rdq\n",
    "for i in range(0, 4):\n",
    "    uniq_events[i] = uniq_events.Date + datetime.timedelta(days=i)\n",
    "\n",
    "# reshape (transpose) for later join with crsp trading dates\n",
    "expand_events = uniq_events.set_index('Date').stack().reset_index().rename(columns={'level_1':'post', 0:'post_date'})\n",
    "\n",
    "# merge with crsp trading dates\n",
    "df_events_bd = pd.merge(expand_events, crsp_dats, how='left', left_on=['post_date'], right_on=['date'])\n",
    "# create the dgap (days gap) variable for min selection\n",
    "df_events_bd['dgap_bd'] =df_events_bd.date - df_events_bd.Date\n",
    "df_events_bd = df_events_bd.reindex(df_events_bd.groupby('Date')['dgap_bd'].idxmin()).rename(columns={'date':'Date_bd'})\n",
    "df_events_bd = df_events_bd[['Date','dgap_bd','Date_bd']].reset_index(drop = True).copy()\n",
    "df_events_bd['index_event'] = 1\n",
    "df_events_bd.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "91d0e675-940d-4102-bb4e-1714399e5cb2",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/cy/3lpv2c157l51wpsxx6rlgtjr0000gn/T/ipykernel_3103/3200349169.py:40: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
      "  df_CDS = df_CDS.append(df_CDS_2).sort_values(['Country_name','date']).reset_index(drop = True)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>Country_name</th>\n",
       "      <th>CDS</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2010-01-18</td>\n",
       "      <td>United Arab Emirates</td>\n",
       "      <td>137.271</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2010-01-19</td>\n",
       "      <td>United Arab Emirates</td>\n",
       "      <td>137.190</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010-01-20</td>\n",
       "      <td>United Arab Emirates</td>\n",
       "      <td>138.765</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2010-01-21</td>\n",
       "      <td>United Arab Emirates</td>\n",
       "      <td>137.820</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2010-01-22</td>\n",
       "      <td>United Arab Emirates</td>\n",
       "      <td>139.131</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        date          Country_name      CDS\n",
       "0 2010-01-18  United Arab Emirates  137.271\n",
       "1 2010-01-19  United Arab Emirates  137.190\n",
       "2 2010-01-20  United Arab Emirates  138.765\n",
       "3 2010-01-21  United Arab Emirates  137.820\n",
       "4 2010-01-22  United Arab Emirates  139.131"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_CDS = pd.read_csv(mac_dir_cds + 'CdsCmanBbgMid.csv')\n",
    "df_CDS['date'] = pd.to_datetime(df_CDS['date'])\n",
    "df_CDS = df_CDS.rename(columns ={'date':'date'})\n",
    "df_CDS = df_CDS.drop(['Unnamed: 0'], axis =1)\n",
    "df_CDS = df_CDS.rename(columns ={'uk':'United Kingdom'})\n",
    "df_CDS = df_CDS.set_index('date')\n",
    "country_names =df_CDS.columns \n",
    "\n",
    "df_CDS = df_CDS.reset_index()\n",
    "df_CDS =  pd.melt(df_CDS, id_vars=['date'], value_vars=country_names)\n",
    "df_CDS = df_CDS.rename(columns = {'value':'CDS','variable':'Country_name'})\n",
    "df_CDS['Country_name'] = df_CDS['Country_name'].str.replace('_',' ')\n",
    "df_CDS['Country_name']  = [x.title() for x in df_CDS['Country_name']]\n",
    "\n",
    "#df_CDS = pd.merge(df_CDS,df_country, how= 'left', on = 'Country_name' )\n",
    "df_CDS = df_CDS.dropna(subset = ['CDS'])\n",
    "\n",
    "list_countries_main = list(df_CDS['Country_name'].unique())\n",
    "\n",
    "df_CDS_2 = pd.read_excel(mac_dir_cds + 'CdsCbinBbg.xlsx')\n",
    "df_CDS_2['Dates'] = pd.to_datetime(df_CDS_2['Dates'])\n",
    "df_CDS_2 = df_CDS_2.rename(columns ={'Dates':'date'})\n",
    "df_CDS_2 = df_CDS_2.set_index('date')\n",
    "country_names =df_CDS_2.columns \n",
    "\n",
    "df_CDS_2 = df_CDS_2.reset_index()\n",
    "df_CDS_2 =  pd.melt(df_CDS_2, id_vars=['date'], value_vars=country_names)\n",
    "df_CDS_2 = df_CDS_2.rename(columns = {'value':'CDS','variable':'Country_name'})\n",
    "df_CDS_2['Country_name'] = df_CDS_2['Country_name'].str.replace('_',' ')\n",
    "df_CDS_2['Country_name']  = [x.title() for x in df_CDS_2['Country_name']]\n",
    "df_CDS_2 = df_CDS_2.dropna(subset = ['CDS'])\n",
    "\n",
    "list_countries_alt  = list(df_CDS_2['Country_name'].unique())\n",
    "\n",
    "list_add = list(set(list_countries_alt) - set(list_countries_main))\n",
    "\n",
    "df_CDS_2 = df_CDS_2[df_CDS_2['Country_name'].isin(list_add)]\n",
    "\n",
    "\n",
    "df_CDS = df_CDS.append(df_CDS_2).sort_values(['Country_name','date']).reset_index(drop = True)\n",
    "df_CDS[df_CDS['date'].dt.year == 2021]['CDS'].mean()\n",
    "\n",
    "df_CDS['Country_name'] = np.where(df_CDS['Country_name'] =='Abu Dhabi','United Arab Emirates',df_CDS['Country_name'])\n",
    "\n",
    "df_CDS.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "4f26577d-3309-49ae-8043-7adc5614c31d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>Country_name</th>\n",
       "      <th>CDS</th>\n",
       "      <th>index_event</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2597</th>\n",
       "      <td>2020-01-01</td>\n",
       "      <td>United Arab Emirates</td>\n",
       "      <td>35.368</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2598</th>\n",
       "      <td>2020-01-02</td>\n",
       "      <td>United Arab Emirates</td>\n",
       "      <td>34.511</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2599</th>\n",
       "      <td>2020-01-03</td>\n",
       "      <td>United Arab Emirates</td>\n",
       "      <td>36.664</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2600</th>\n",
       "      <td>2020-01-06</td>\n",
       "      <td>United Arab Emirates</td>\n",
       "      <td>37.541</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2601</th>\n",
       "      <td>2020-01-07</td>\n",
       "      <td>United Arab Emirates</td>\n",
       "      <td>37.968</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           date          Country_name     CDS  index_event\n",
       "2597 2020-01-01  United Arab Emirates  35.368            0\n",
       "2598 2020-01-02  United Arab Emirates  34.511            0\n",
       "2599 2020-01-03  United Arab Emirates  36.664            0\n",
       "2600 2020-01-06  United Arab Emirates  37.541            0\n",
       "2601 2020-01-07  United Arab Emirates  37.968            0"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_CDS_events = pd.merge(df_CDS, df_events_bd, how = 'left', left_on = 'date', right_on = 'Date_bd')\n",
    "\n",
    "df_CDS_events['index_event'] = np.where(df_CDS_events['index_event'] == 1, 1,0)\n",
    "df_CDS_events = df_CDS_events[['date','Country_name','CDS','index_event']]\n",
    "df_CDS_events = df_CDS_events[df_CDS_events['date'].dt.year>2019]\n",
    "\n",
    "df_CDS_events.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "a39e2a67-6f44-4acf-809f-72f1d7e7f67c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>date</th>\n",
       "      <th>Country_name</th>\n",
       "      <th>CDS</th>\n",
       "      <th>index_event</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4771</td>\n",
       "      <td>2020-01-01</td>\n",
       "      <td>Algeria</td>\n",
       "      <td>59.580</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6011</td>\n",
       "      <td>2020-01-01</td>\n",
       "      <td>Angola</td>\n",
       "      <td>416.575</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8701</td>\n",
       "      <td>2020-01-01</td>\n",
       "      <td>Argentina</td>\n",
       "      <td>3297.615</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>11785</td>\n",
       "      <td>2020-01-01</td>\n",
       "      <td>Australia</td>\n",
       "      <td>15.535</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>14947</td>\n",
       "      <td>2020-01-01</td>\n",
       "      <td>Austria</td>\n",
       "      <td>8.420</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   index       date Country_name       CDS  index_event\n",
       "0   4771 2020-01-01      Algeria    59.580            0\n",
       "1   6011 2020-01-01       Angola   416.575            0\n",
       "2   8701 2020-01-01    Argentina  3297.615            0\n",
       "3  11785 2020-01-01    Australia    15.535            0\n",
       "4  14947 2020-01-01      Austria     8.420            0"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# summary statistics\n",
    "\n",
    "df_CDS_events = pd.read_stata('/Users/robertogomezcram/Dropbox/Cram_Olbert/Submissions and drafts/Journals/RFS/Publication docs/replication_material/Data/CountriesCDS.dta')\n",
    "df_CDS_events =df_CDS_events[ df_CDS_events['date'].dt.year == 2021]\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "2e4def72-4b9f-4713-880f-afaf2858b7c3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>country_alpha2</th>\n",
       "      <th>country_alpha3</th>\n",
       "      <th>country_name</th>\n",
       "      <th>continent</th>\n",
       "      <th>population</th>\n",
       "      <th>population_fill</th>\n",
       "      <th>gdp_capita_ppp</th>\n",
       "      <th>gdp_ppp_rel</th>\n",
       "      <th>gdp_total_WB</th>\n",
       "      <th>gdp_ppp</th>\n",
       "      <th>...</th>\n",
       "      <th>tax_accrued</th>\n",
       "      <th>tax_cash</th>\n",
       "      <th>employees</th>\n",
       "      <th>assets_tangible</th>\n",
       "      <th>jurisdiction_name</th>\n",
       "      <th>profit_pretax_0</th>\n",
       "      <th>Country_name</th>\n",
       "      <th>sort_dist_mintax</th>\n",
       "      <th>sort_dist_mintax_rev</th>\n",
       "      <th>sort_dist_mintax_profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td></td>\n",
       "      <td>CUW</td>\n",
       "      <td>cuw</td>\n",
       "      <td>check</td>\n",
       "      <td>0.155014</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20300.318359</td>\n",
       "      <td>1.212269</td>\n",
       "      <td>2.595822e+09</td>\n",
       "      <td>3.146834e+09</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td></td>\n",
       "      <td>MNP</td>\n",
       "      <td>mnp</td>\n",
       "      <td>check</td>\n",
       "      <td>0.057557</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.182000e+09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td>XKX</td>\n",
       "      <td>xkx</td>\n",
       "      <td>check</td>\n",
       "      <td>1.775378</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10795.495117</td>\n",
       "      <td>2.483643</td>\n",
       "      <td>7.716925e+09</td>\n",
       "      <td>1.916608e+10</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td></td>\n",
       "      <td>MAF</td>\n",
       "      <td>maf</td>\n",
       "      <td>check</td>\n",
       "      <td>0.038659</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td></td>\n",
       "      <td>IMN</td>\n",
       "      <td>imn</td>\n",
       "      <td>check</td>\n",
       "      <td>0.085032</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.315388e+09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 60 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  country_alpha2 country_alpha3 country_name continent  population  \\\n",
       "0                           CUW          cuw     check    0.155014   \n",
       "1                           MNP          mnp     check    0.057557   \n",
       "2                           XKX          xkx     check    1.775378   \n",
       "3                           MAF          maf     check    0.038659   \n",
       "4                           IMN          imn     check    0.085032   \n",
       "\n",
       "   population_fill  gdp_capita_ppp  gdp_ppp_rel  gdp_total_WB       gdp_ppp  \\\n",
       "0              NaN    20300.318359     1.212269  2.595822e+09  3.146834e+09   \n",
       "1              NaN             NaN          NaN  1.182000e+09           NaN   \n",
       "2              NaN    10795.495117     2.483643  7.716925e+09  1.916608e+10   \n",
       "3              NaN             NaN          NaN           NaN           NaN   \n",
       "4              NaN             NaN          NaN  7.315388e+09           NaN   \n",
       "\n",
       "   ...  tax_accrued  tax_cash  employees  assets_tangible  jurisdiction_name  \\\n",
       "0  ...          NaN       NaN        NaN              NaN                      \n",
       "1  ...          NaN       NaN        NaN              NaN                      \n",
       "2  ...          NaN       NaN        NaN              NaN                      \n",
       "3  ...          NaN       NaN        NaN              NaN                      \n",
       "4  ...          NaN       NaN        NaN              NaN                      \n",
       "\n",
       "   profit_pretax_0  Country_name  sort_dist_mintax  sort_dist_mintax_rev  \\\n",
       "0              0.0           NaN               NaN                   NaN   \n",
       "1              0.0           NaN               NaN                   NaN   \n",
       "2              0.0           NaN               NaN                   NaN   \n",
       "3              0.0           NaN               NaN                   NaN   \n",
       "4              0.0           NaN               NaN                   NaN   \n",
       "\n",
       "   sort_dist_mintax_profit  \n",
       "0                      NaN  \n",
       "1                      NaN  \n",
       "2                      NaN  \n",
       "3                      NaN  \n",
       "4                      NaN  \n",
       "\n",
       "[5 rows x 60 columns]"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_country = pd.read_stata(mac_dir_countries + 'country_sortingvariables_16Feb2022.dta')\n",
    "df_link = pd.read_excel(mac_dir_countries+ 'CBCR_countries.xls',)  \n",
    "df_country = pd.merge(df_country,df_link, how = 'left', on= 'country_alpha3' )\n",
    "df_country_rfs = pd.read_stata(mac_dir_countries + 'country_sortingvariables_RFS.dta') \n",
    "df_country_rfs = df_country_rfs[['country_alpha3','sort_dist_mintax', 'sort_dist_mintax_rev', 'sort_dist_mintax_profit']]\n",
    "#df_link = pd.read_excel(mac_dir_countries+ 'CBCR_countries.xls',)  \n",
    "\n",
    "#df_link.to_excel('/Volumes/XTRMQ/Data/MarketIt/'+'country_list.xlsx')\n",
    "df_country = pd.merge(df_country,df_country_rfs, how = 'left', on = 'country_alpha3' )\n",
    "df_country.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "id": "71d72d6c-99be-4872-aae3-e00d344d0b51",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_country = pd.read_stata(mac_dir_countries + 'country_sortingvariables_16Feb2022.dta')\n",
    "df_link = pd.read_excel(mac_dir_countries+ 'CBCR_countries.xls',)  \n",
    "df_country = pd.merge(df_country,df_link, how = 'left', on= 'country_alpha3' )\n",
    "\n",
    "df_country_rfs = pd.read_stata(mac_dir_countries + 'country_sortingvariables_RFS.dta') \n",
    "df_country_rfs = df_country_rfs[['country_alpha3','sort_dist_mintax', 'sort_dist_mintax_rev', 'sort_dist_mintax_profit']]\n",
    "#df_link = pd.read_excel(mac_dir_countries+ 'CBCR_countries.xls',)  \n",
    "\n",
    "#df_link.to_excel('/Volumes/XTRMQ/Data/MarketIt/'+'country_list.xlsx')\n",
    "df_country = pd.merge(df_country,df_country_rfs, how = 'left', on = 'country_alpha3' )\n",
    "df_country.head(5)\n",
    "variables_to_describe = ['Country_name', 'split_haven_list_all','cbcr_rev_to_consumption','sort_dist_mintax','cbcr_pretax_margin',\n",
    "                     'cbcr_profit_to_consumption',\n",
    "                     'cbcr_tax_cash_to_consumption',\n",
    "                     'cbcr_entities_to_population']\n",
    "\n",
    "df_country['sort_dist_mintax'] = df_country['sort_dist_mintax'] *df_country['cbcr_rev_to_consumption']\n",
    "\n",
    "df_country = df_country[['Country_name', 'split_haven_list_all','cbcr_rev_to_consumption','sort_dist_mintax','cbcr_pretax_margin',\n",
    "                     'cbcr_profit_to_consumption',\n",
    "                     'cbcr_tax_cash_to_consumption',\n",
    "                     'cbcr_entities_to_population']].dropna().sort_values(['Country_name']).head()\n",
    "\n",
    "df_country.to_stata('/Users/robertogomezcram/Dropbox/Cram_Olbert/Submissions and drafts/Journals/RFS/Publication docs/replication_material/Data/CountryExposureMeasures.dta')\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f05e652e-e288-42e4-8147-8ebaeee54485",
   "metadata": {},
   "source": [
    "# summary statistics CDS countries\n",
    "\n",
    "-- Include"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2f8f22af-44a2-4726-96aa-82a5c0d1e5cd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# summary statistics CDS countries\n",
    "df_country = pd.read_stata('CountryExposureMeasures.dta')\n",
    "df_CDS_events =  pd.read_stata('CountriesCDS.dta')\n",
    "\n",
    "df_CDS_events =df_CDS_events[ df_CDS_events['date'].dt.year == 2021].copy()\n",
    "df_CDS_temp  = pd.merge(df_CDS_events,df_country[['Country_name','split_haven_list_all']], how = 'left', on =['Country_name'] )\n",
    "df_CDS_temp['changes'] = df_CDS_temp.groupby(['Country_name'])['CDS'].pct_change()*100\n",
    "df_total = pd.DataFrame(df_CDS_temp['changes'].describe(percentiles = [0.05,0.25,0.5,0.75,0.95]))\n",
    "df_total.columns = ['Total']\n",
    "\n",
    "df_total_split = df_CDS_temp.groupby(['split_haven_list_all'])['changes'].describe(percentiles = [0.05,0.25,0.5,0.75,0.95]).T\n",
    "df_total_split.columns = ['Non-tax haven', 'Tax haven']\n",
    "\n",
    "df_total = pd.merge(df_total,df_total_split, how = 'inner', left_index = True, right_index = True ).T\n",
    "df_total = df_total.drop(['min','max'], axis = 1)\n",
    "\n",
    "select_countries = ['Usa','Germany','Mexico','India','Ireland','Hong Kong']\n",
    "\n",
    "# USA, Germany, Mexico, India, Ireland, Singapore, Bahamas\n",
    "df_temp = df_CDS_events[df_CDS_events['Country_name'].isin(select_countries)]\n",
    "df_temp = df_temp.groupby('Country_name')['CDS'].describe(percentiles = [0.05,0.25,0.5,0.75,0.95])\n",
    "df_temp = df_temp.drop(['min','max'], axis = 1)\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "3c41dda1-102a-4e45-8c01-fabd450828ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Summary statistics exposure measures\n",
    "variables_to_describe = ['cbcr_rev_to_consumption','sort_dist_mintax','cbcr_pretax_margin']\n",
    "                 \n",
    "perc =[.05, .25, .50, .75,0.95]\n",
    "    \n",
    "df_statistics_all = pd.DataFrame()\n",
    "for index_cbcr in variables_to_describe:\n",
    "  \n",
    "    df_country_des = df_country.copy()    \n",
    "       \n",
    "    df_statistics = df_country_des[[index_cbcr]].describe( percentiles = perc).T\n",
    "    df_statistics = df_statistics.drop(['min','max'], axis = 1).reset_index()\n",
    "    df_statistics = df_statistics.rename(columns = {'index':'Variable'})\n",
    "\n",
    "    # Tax haven versus not tax haven\n",
    "    df_statistics_haven =df_country_des.groupby(['split_haven_list_all'])[index_cbcr].describe( percentiles = perc)\n",
    "    df_statistics_haven = df_statistics_haven.drop(['min','max'], axis = 1).reset_index()\n",
    "    df_statistics_haven = df_statistics_haven.rename(columns = {'split_haven_list_all':'Variable'})\n",
    "    df_statistics_haven['Variable'] = index_cbcr + '_haven' + '_' +  df_statistics_haven['Variable'].astype(int).astype(str)\n",
    "\n",
    "    df_statistics_all = df_statistics_all.append(df_statistics.append(df_statistics_haven))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "3d3af6ca-6088-41a1-859c-abe987fcb9a5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Scatter plot\n",
    "fig, ax = plt.subplots(1, 1 , figsize=(15,6), sharex=True, sharey=False)\n",
    "    \n",
    "list_countries = [ 'Bahamas', 'United States', 'China','India', 'Germany',   'United Kingdom',  'Brazil',  'Hong Kong', 'Ireland', 'Singapore', 'Luxembourg']\n",
    "\n",
    "df_plot = df_country[df_country['Country_name'].isin(list_countries)].copy()\n",
    "\n",
    "df_plot['household_consumption_ppp_plot'] = df_plot['household_consumption_ppp']/1000000000\n",
    "ax = sns.scatterplot(x=\"household_consumption_ppp_plot\", y='cbcr_rev_to_consumption', markers=\"o\", s=200, data=df_plot, color = 'darkblue') \n",
    "\n",
    "for ii, rows in df_plot.iterrows():\n",
    "    rows\n",
    "    x_val = rows['household_consumption_ppp_plot']\n",
    "    y_val = rows['cbcr_rev_to_consumption']\n",
    "    word = rows['Country_name']\n",
    "    if (word  !='Singapore') &  (word  !='Bahamas'):\n",
    "        plt.text(x_val+0, y_val+0.1, word, fontsize=12)\n",
    "    if (word  =='Singapore') | (word  =='Bahamas'):\n",
    "        plt.text(x_val+250, y_val-0.2, word, fontsize=12)  \n",
    "        \n",
    "     \n",
    "ax.spines['top'].set_visible(False)\n",
    "ax.spines['right'].set_visible(False)\n",
    "ax.spines['bottom'].set_visible(True)\n",
    "ax.spines['left'].set_visible(True)\n",
    "ax.tick_params(axis='both', which='major', labelsize=15)\n",
    "ax.tick_params(axis='both', which='minor', labelsize=8)\n",
    "\n",
    "plt.xlabel('Market size (household consumption in billions, PPP)', size=18)\n",
    "plt.ylabel('MNC Revenues to Market Size', size=18)\n",
    "\n",
    "plt.tight_layout()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "c4c1abf5-dce8-41be-8de0-8fb35fa8a6e1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# regression table\n",
    "\n",
    "df_cds = pd.read_stata('CountriesCDS.dta')\n",
    "df_cds_sorts = pd.merge(df_cds,df_country, how = 'left', on ='Country_name' )\n",
    "\n",
    "\n",
    "exog_var_results = ['const','const_T',\n",
    "                    'const_events','const_events_T',\n",
    "                    'b','b_T',                    \n",
    "                    'b_events','b_events_T', 'R2', 'nObs']\n",
    "\n",
    "\n",
    "columns_sorts = ['cbcr_rev_to_consumption', 'sort_dist_mintax','cbcr_pretax_margin']\n",
    "\n",
    "coef_results = pd.DataFrame(columns = columns_sorts, index = exog_var_results )\n",
    "\n",
    "for index_reg in coef_results.columns:\n",
    "\n",
    "    df_reg = df_cds_sorts[['CumCDS_0', 'Country_name','date_test','index_event',index_reg]].dropna()\n",
    "    \n",
    "    df_reg[index_reg]     = (df_reg[index_reg]   - df_reg[index_reg].mean() )/ df_reg[index_reg].std() \n",
    "    df_reg = df_reg.set_index(['Country_name','date_test'])  \n",
    "    df_reg['Const'] = 1\n",
    "    df_reg['index_event_X'] = df_reg['index_event']* df_reg[index_reg]\n",
    "\n",
    "    exog_var = ['Const','index_event',index_reg,'index_event_X']\n",
    "\n",
    "    exog = df_reg[exog_var ]\n",
    "    enog  = df_reg['CumCDS_0'] *100 # in basis points\n",
    "    mod = PanelOLS(enog, exog)\n",
    "    results = mod.fit(cov_type='clustered' , cluster_entity = True, cluster_time=True)\n",
    "\n",
    "    params = results.params\n",
    "    tvalues = results.tstats\n",
    "    nobs = results.nobs\n",
    "    R2 = results.rsquared*100\n",
    "    \n",
    "    coef_results[index_reg].loc['const'] = params[0]\n",
    "    coef_results[index_reg].loc['const_T'] = tvalues[0]    \n",
    "\n",
    "    coef_results[index_reg].loc['const_events'] = params[1]\n",
    "    coef_results[index_reg].loc['const_events_T'] = tvalues[1]  \n",
    "\n",
    "    coef_results[index_reg].loc['b'] = params[2]\n",
    "    coef_results[index_reg].loc['b_T'] = tvalues[2] \n",
    "\n",
    "    coef_results[index_reg].loc['b_events'] = params[3]\n",
    "    coef_results[index_reg].loc['b_events_T'] = tvalues[3]  \n",
    "    coef_results[index_reg].loc['R2'] = R2\n",
    "    coef_results[index_reg].loc['nObs'] = nobs\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "78cb8074-4882-4d06-827f-b04fd182878a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# scatter plot\n",
    "d_plot_1 = df_cds_sorts.copy()\n",
    "select_var = 'cbcr_rev_to_consumption'\n",
    "\n",
    "d_plot_1['CumCDS_0'] = d_plot_1['CumCDS_0']  \n",
    "d_plot_1['quantile_ex_3'] = pd.qcut(d_plot_1[select_var],10,duplicates='drop')\n",
    "d_plot_1 = d_plot_1.groupby(['quantile_ex_3','index_event'])[[select_var,'CumCDS_0']].mean().reset_index()\n",
    "\n",
    "\n",
    "df_plot = d_plot_1.pivot(index='quantile_ex_3', columns= 'index_event', values=['CumCDS_0', select_var ])\n",
    "df_plot.columns =[ 'ret_0', 'ret_1', 'x_0', 'x_1' ]\n",
    "\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "\n",
    "fig, ax = plt.subplots(figsize=(10, 10))\n",
    "x = df_plot['x_1']\n",
    "y = df_plot['ret_1']\n",
    "\n",
    "ax.scatter(x , y,s=100, color = 'darkblue')\n",
    "m, b = np.polyfit(x, y, 1)\n",
    "plt.plot(x, m*x+b,color ='darkblue',linewidth=3)\n",
    "\n",
    "x = df_plot['x_0']\n",
    "y = df_plot['ret_0']\n",
    "\n",
    "ax.scatter(x , y,s=100, color = 'red')\n",
    "m, b = np.polyfit(x, y, 1)\n",
    "plt.plot(x, m*x+b,color ='red',linewidth=3)\n",
    "\n",
    "\n",
    "plt.ylabel('Changes in CDS in percentage points', fontsize='22')\n",
    "\n",
    "if select_var == 'cbcr_rev_to_consumption':\n",
    "    plt.xlabel('MNC Revenues to Market Size', fontsize='22')\n",
    "    name_figure = 'fig_cbcr_rev_to_consumption_CDS.pdf'\n",
    "    plt.legend(labels=['Tax event dates','Other dates'], loc='upper left'  , fontsize='20',frameon=False)\n",
    "\n",
    "if select_var == 'cbcr_pretax_margin':\n",
    "    plt.xlabel('MNC Taxable Income Margin', fontsize='22')\n",
    "    name_figure = 'fig_cbcr_pretax_margin.pdf'\n",
    "        \n",
    "\n",
    "ax.spines['top'].set_visible(False)\n",
    "ax.spines['right'].set_visible(False)  \n",
    "ax.xaxis.set_tick_params(labelsize=20)\n",
    "ax.yaxis.set_tick_params(labelsize=20)\n",
    "\n",
    "plt.tight_layout()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "id": "0df9543a-1ce9-456c-a495-e32af025c049",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Event-study plot\n",
    "df_cum_cds = pd.read_stata('CountriesCumulativeCDS.dta')\n",
    "\n",
    "df_cds_sorts = pd.merge(df_cds_all,df_country, how = 'left', on ='Country_name' )\n",
    "\n",
    "select_var = 'cbcr_pretax_margin'\n",
    "n_bins = 10\n",
    "break_low = 10\n",
    "break_high = 90\n",
    "\n",
    "d_plot_reg_event = df_cds_sorts[df_cds_sorts['index_event'] == 1].copy()\n",
    "break_low_df = d_plot_reg_event[[select_var]].describe(percentiles=[break_low/100]).T[[ str(break_low) + '%']]\n",
    "break_low_df = break_low_df.rename(columns={str(break_low) + '%':'Low'})\n",
    "break_high_df = d_plot_reg_event[[select_var]].describe(percentiles=[break_high/100]).T[[ str(break_high) + '%']]\n",
    "break_high_df = break_high_df.rename(columns={str(break_high) + '%':'High'})\n",
    "breaks_temp = pd.merge(break_low_df, break_high_df, how = 'inner', left_index = True, right_index = True)\n",
    "d_plot_reg_event['bins'] = 0 \n",
    "d_plot_reg_event['bins'] = np.where(d_plot_reg_event[select_var] <= breaks_temp['Low'][0], -1, d_plot_reg_event['bins'])    \n",
    "d_plot_reg_event['bins'] = np.where(d_plot_reg_event[select_var] > breaks_temp['High'][0], 1, d_plot_reg_event['bins'])   \n",
    "\n",
    "\n",
    "d_plot_reg_not_event = df_cds_sorts[df_cds_sorts['index_event'] == 0].copy()\n",
    "break_low_df = d_plot_reg_not_event[[select_var]].describe(percentiles=[break_low/100]).T[[ str(break_low) + '%']]\n",
    "break_low_df = break_low_df.rename(columns={str(break_low) + '%':'Low'})\n",
    "break_high_df = d_plot_reg_not_event[[select_var]].describe(percentiles=[break_high/100]).T[[ str(break_high) + '%']]\n",
    "break_high_df = break_high_df.rename(columns={str(break_high) + '%':'High'})\n",
    "breaks_temp = pd.merge(break_low_df, break_high_df, how = 'inner', left_index = True, right_index = True)\n",
    "d_plot_reg_not_event['bins'] = 0 \n",
    "d_plot_reg_not_event['bins'] = np.where(d_plot_reg_not_event[select_var] <= breaks_temp['Low'][0], -1, d_plot_reg_not_event['bins'])    \n",
    "d_plot_reg_not_event['bins'] = np.where(d_plot_reg_not_event[select_var] > breaks_temp['High'][0], 1, d_plot_reg_not_event['bins'])   \n",
    "\n",
    "# compute changes in cds around the event\n",
    "days_announcements = list(d_plot_reg_not_event['ncount'].unique())\n",
    "\n",
    "results_df_all_diff_not_event = pd.DataFrame()\n",
    "for index_days in days_announcements:\n",
    "\n",
    "    df_bins_reg = d_plot_reg_not_event[d_plot_reg_not_event['ncount'] == index_days].copy()\n",
    "    df_bins_reg = df_bins_reg[df_bins_reg['bins'] != 0]\n",
    "    df_bins_reg['Dummy_exposed'] = np.where(df_bins_reg['bins']==1,1,0 )\n",
    "    df_plot_reg = df_bins_reg[['Country_name','date_test','CDS_norm','Dummy_exposed'] ]\n",
    "    df_plot_reg = df_plot_reg.set_index(['Country_name','date_test']) \n",
    "    df_plot_reg['Const'] = 1\n",
    "    exog_var  = ['Const','Dummy_exposed']\n",
    "    df_plot_reg = df_plot_reg.dropna(subset = exog_var + ['CDS_norm'])\n",
    "    exog = df_plot_reg[exog_var ]\n",
    "    enog  = df_plot_reg['CDS_norm']*100 \n",
    "    mod = PanelOLS(enog, exog)\n",
    "    results = mod.fit(cov_type='clustered' , cluster_entity = True, cluster_time=True)\n",
    "    params_est =pd.DataFrame(results.params)  \n",
    "    tvals_est = pd.DataFrame(results.tstats) \n",
    "    std_errors = pd.DataFrame(results.std_errors) \n",
    "    results_df = pd.merge(params_est,tvals_est, how = 'inner', left_index = True, right_index = True)\n",
    "    results_df = pd.merge(results_df,std_errors, how = 'inner', left_index = True, right_index = True).T\n",
    "    results_df['days'] = index_days\n",
    "\n",
    "    results_df_all_diff_not_event = results_df_all_diff_not_event.append(results_df)\n",
    "    \n",
    "    \n",
    "results_df_all_diff_event = pd.DataFrame()\n",
    "for index_days in days_announcements:\n",
    "\n",
    "    df_bins_reg = d_plot_reg_event[d_plot_reg_event['ncount'] == index_days].copy()\n",
    "    df_bins_reg = df_bins_reg[df_bins_reg['bins'] != 0]\n",
    "    df_bins_reg['Dummy_exposed'] = np.where(df_bins_reg['bins']==1,1,0 )\n",
    "    df_plot_reg = df_bins_reg[['Country_name','date_test','CDS_norm','Dummy_exposed'] ]\n",
    "    df_plot_reg = df_plot_reg.set_index(['Country_name','date_test']) \n",
    "    df_plot_reg['Const'] = 1\n",
    "    exog_var  = ['Const','Dummy_exposed']\n",
    "    df_plot_reg = df_plot_reg.dropna(subset = exog_var + ['CDS_norm'])\n",
    "    exog = df_plot_reg[exog_var ]\n",
    "    enog  = df_plot_reg['CDS_norm']*100 \n",
    "    mod = PanelOLS(enog, exog)\n",
    "    results = mod.fit(cov_type='clustered' , cluster_entity = True, cluster_time=True)\n",
    "\n",
    "    params_est =pd.DataFrame(results.params)  \n",
    "    tvals_est = pd.DataFrame(results.tstats) \n",
    "    std_errors = pd.DataFrame(results.std_errors) \n",
    "\n",
    "    results_df = pd.merge(params_est,tvals_est, how = 'inner', left_index = True, right_index = True)\n",
    "    results_df = pd.merge(results_df,std_errors, how = 'inner', left_index = True, right_index = True).T\n",
    "    results_df['days'] = index_min\n",
    "\n",
    "    results_df_all_diff_event = results_df_all_diff_event.append(results_df)\n",
    "    \n",
    "    \n",
    "df_t = results_df_all_diff_not_event[['days', 'Dummy_exposed']]\n",
    "df_t_param = df_t[df_t.index == 'parameter'].reset_index(drop = True).set_index(['days'])\n",
    "df_t_param.columns = ['d']\n",
    "df_t_param = df_t_param.reset_index()\n",
    "df_t_se = df_t[df_t.index == 'std_error'].reset_index(drop = True).set_index(['days'])\n",
    "df_t_se.columns = ['d_se']\n",
    "df_t_se = df_t_se.reset_index()\n",
    "df_t_2 = pd.merge(df_t_param, df_t_se, how = 'outer', on = 'days')\n",
    "df_t_2['lb'] = df_t_2['d'] - 1.96*df_t_2['d_se']\n",
    "df_t_2['ub'] = df_t_2['d']+1.96*df_t_2['d_se']\n",
    "df_t_2['days'] = df_t_2['days']\n",
    "df_plot_diff_not_event = df_t_2.copy().sort_values('days')\n",
    "\n",
    "  \n",
    "\n",
    "df_t = results_df_all_diff_event[['days', 'Dummy_exposed']]\n",
    "df_t_param = df_t[df_t.index == 'parameter'].reset_index(drop = True).set_index(['days'])\n",
    "df_t_param.columns = ['d']\n",
    "df_t_param = df_t_param.reset_index()\n",
    "df_t_se = df_t[df_t.index == 'std_error'].reset_index(drop = True).set_index(['days'])\n",
    "df_t_se.columns = ['d_se']\n",
    "df_t_se = df_t_se.reset_index()\n",
    "df_t_2 = pd.merge(df_t_param, df_t_se, how = 'outer', on = 'days')\n",
    "df_t_2['lb'] = df_t_2['d'] - 1.96*df_t_2['d_se']\n",
    "df_t_2['ub'] = df_t_2['d']+1.96*df_t_2['d_se']\n",
    "df_t_2['days'] = df_t_2['days']\n",
    "df_plot_diff_event = df_t_2.copy().sort_values('days')    \n",
    "\n",
    "# event-study ploy\n",
    "fig, ax = plt.subplots(1, 1 , figsize=(16,6), sharex=True, sharey=False)\n",
    "\n",
    "xdata = df_plot_diff_event['days']\n",
    "ydata = df_plot_diff_event['d'] \n",
    "ydata_lb = df_plot_diff_event['ub'] \n",
    "ydata_ub = df_plot_diff_event['lb'] \n",
    "plt.plot(xdata, ydata, '-',color = 'darkblue', linewidth=3)\n",
    "\n",
    "\n",
    "xdata_ne = df_plot_diff_not_event['days']\n",
    "ydata_ne = df_plot_diff_not_event['d'] \n",
    "ydata_lb_ne = df_plot_diff_not_event['ub'] \n",
    "ydata_ub_ne = df_plot_diff_not_event['lb'] \n",
    "\n",
    "plt.fill_between(xdata, ydata_lb, ydata_ub, color='darkblue', alpha=0.1)\n",
    "plt.axhline(y=0,linestyle =':',color='black',label=r'Announcement',lw=2) \n",
    "plt.axvline(x=0,linestyle =':',color='black',label=r'Announcement',lw=2) \n",
    "\n",
    "\n",
    "plt.ylim([-2,5])\n",
    "plt.xlim([-4,12])\n",
    "\n",
    "\n",
    "plt.ylabel('Cumulative changes in CDS  \\n in percent', fontsize='20')\n",
    "plt.xlabel('Days since event', fontsize='20')\n",
    "ax.spines['top'].set_visible(False)\n",
    "ax.spines['right'].set_visible(False)  \n",
    "ax.xaxis.set_tick_params(labelsize=18)\n",
    "ax.yaxis.set_tick_params(labelsize=18)\n",
    "\n",
    "plt.legend(labels=['Tax event dates','Other dates'], loc='upper left'  , fontsize='20',frameon=False)\n",
    "\n",
    "plt.tight_layout()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
